In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets.samples_generator import make_blobs
%matplotlib inline
Order of models:
I. a OneVsAllClassifier for the three class clusters below
-> Ia. normalizing the boundary vectors
-> Ib. tuning parameter C
II. a multiclass SVM for the same data
III. a multiclass SVM for NYC urban sound files (not trained yet)
In [2]:
# Create the training data
np.random.seed(2)
X, y = make_blobs(n_samples=300,cluster_std=.25, centers=np.array([(-3,1),(0,2),(3,1)]))
plt.scatter(X[:, 0], X[:, 1], c=y, s=50)
Out[2]:
In [3]:
from sklearn.base import BaseEstimator, ClassifierMixin, clone
from numpy import linalg as L
class OneVsAllClassifier(BaseEstimator, ClassifierMixin):
"""
One-vs-all classifier
We assume that the classes will be the integers 0,..,(n_classes-1).
We assume that the estimator provided to the class, after fitting, has a "decision_function" that
returns the score for the positive class.
"""
def __init__(self, estimator, n_classes):
"""
Constructed with the number of classes and an estimator (e.g. an
SVM estimator from sklearn)
@param estimator : binary base classifier used
@param n_classes : number of classes
"""
self.n_classes = n_classes
self.estimators = [clone(estimator) for _ in range(n_classes)]
self.fitted = False
def fit(self, X, y=None):
"""
This should fit one classifier for each class.
self.estimators[i] should be fit on class i vs rest
@param X: array-like, shape = [n_samples,n_features], input data
@param y: array-like, shape = [n_samples,] class labels
@return returns self
"""
for i in range(self.n_classes):
self.estimators[i].fit(X, y==i)
self.fitted = True
return self
def decision_function(self, X):
"""
Returns the score of each input for each class. Assumes
that the given estimator also implements the decision_function method (which sklearn SVMs do),
and that fit has been called.
@param X : array-like, shape = [n_samples, n_features] input data
@return array-like, shape = [n_samples, n_classes]
"""
if not self.fitted:
raise RuntimeError("You must train classifer before predicting data.")
if not hasattr(self.estimators[0], "decision_function"):
raise AttributeError(
"Base estimator doesn't have a decision_function attribute.")
classes_score = [ est.decision_function(X) for est in self.estimators ]
score_ = classes_score[0]
for thing in classes_score[1:]:
score_ = np.column_stack( (score_, thing ) )
return score_
def predict(self, X):
"""
Predict the class with the highest score.
@param X: array-like, shape = [n_samples,n_features] input data
@returns array-like, shape = [n_samples,] the predicted classes for each input
"""
predictions = np.zeros(X.shape[0])
decisions = self.decision_function(X)
for i in range(predictions.shape[0]):
predictions[i] = np.argmax( decisions[i] )
return predictions
In [4]:
#Here we test the OneVsAllClassifier
from sklearn import svm
svm_estimator = svm.LinearSVC(loss='hinge', fit_intercept=False, C=5)
clf_onevsall = OneVsAllClassifier(svm_estimator, n_classes=3)
clf_onevsall.fit(X,y)
for i in range(3) :
print("Coeffs %d"%i)
print(clf_onevsall.estimators[i].coef_) #Will fail if you haven't implemented fit yet
print( clf_onevsall.estimators[i].coef_/L.norm(clf_onevsall.estimators[i].coef_))
# create a mesh to plot in
h = .02 # step size in the mesh
x_min, x_max = min(X[:,0])-3,max(X[:,0])+3
y_min, y_max = min(X[:,1])-3,max(X[:,1])+3
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
np.arange(y_min, y_max, h))
mesh_input = np.c_[xx.ravel(), yy.ravel()]
Z = clf_onevsall.predict(mesh_input)
Z = Z.reshape(xx.shape)
plt.contourf(xx, yy, Z, cmap=plt.cm.coolwarm, alpha=0.8)
# Plot also the training points
plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.coolwarm)
from sklearn import metrics
metrics.confusion_matrix(y, clf_onevsall.predict(X))
Out[4]:
Notice the margin vs classification accuracy trade-off by tuning parameter C
In [5]:
# create a mesh to plot in
h = .02 # step size in the mesh
x_min, x_max = min(X[:,0])-3,max(X[:,0])+3
y_min, y_max = min(X[:,1])-3,max(X[:,1])+3
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
np.arange(y_min, y_max, h))
mesh_input = np.c_[xx.ravel(), yy.ravel()]
def f(parameter):
svm_estimator = svm.LinearSVC(loss='hinge', fit_intercept=False, C=parameter)
clf_onevsall = OneVsAllClassifier(svm_estimator, n_classes=3)
clf_onevsall.fit(X,y)
Z = clf_onevsall.predict(mesh_input)
Z = Z.reshape(xx.shape)
plt.contourf(xx, yy, Z, cmap=plt.cm.coolwarm, alpha=0.2)
# Plot also the training points
plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.coolwarm)
for c in [1, 50, 100, 200, 500, 1000]:
f(c)
By normalizing the three boundary vectors, discrepancies between intra-boundary angles have lowered.
In [6]:
soa = np.array([[0, 0, -0.76080386, -0.64898189], [0, 0,0.68477689, 0.72875278], [0, 0, 0.73358915, -0.67959323]])
A, B, C, D = zip(*(-1*soa) )
plt.figure()
ax = plt.gca()
ax.quiver(A, B, C, D, scale=1)
ax.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.coolwarm)
ax.set_xlim([-6, 6])
ax.set_ylim([-2, 4])
plt.draw()
plt.show()
Multiclass SVM
In [17]:
def zeroOne(y,a) :
'''
Computes the zero-one loss.
@param y: output class
@param a: predicted class
@return 1 if different, 0 if same
'''
return int(y != a)
def featureMap(X,y,num_classes) :
'''
Computes the class-sensitive features.
@param X: array-like, shape = [n_samples,n_inFeatures] or [n_inFeatures,], input features for input data
@param y: a target class (in range 0,..,num_classes-1)
@return array-like, shape = [n_samples,n_outFeatures], the class sensitive features for class y
'''
#The following line handles X being a 1d-array or a 2d-array
num_samples, num_inFeatures = (1,X.shape[0]) if len(X.shape) == 1 else (X.shape[0],X.shape[1])
num_outFeatures = num_inFeatures*num_classes
output = np.zeros((num_samples, num_outFeatures))
if X.ndim == 1:
output[0, y*num_inFeatures: y*num_inFeatures+2] = X
else:
for i in range(num_samples):
output[i, y*num_inFeatures: y*num_inFeatures +2 ] = X[i]
return output
def sgd(X, y, num_outFeatures, subgd, lam = 0.01, eta = 0.1, T = 10000):
'''
Runs subgradient descent, and outputs resulting parameter vector.
@param X: array-like, shape = [n_samples,n_features], input training data
@param y: array-like, shape = [n_samples,], class labels
@param num_outFeatures: number of class-sensitive features
@param subgd: function taking x,y and giving subgradient of objective
@param weight w
@param eta: learning rate for SGD
@param T: maximum number of iterations
@return: vector of weights
'''
num_samples = X.shape[0]
w_store = np.zeros(num_outFeatures)
w = np.zeros(num_outFeatures)
#w = np.array([0,0.1,0,0.1,0,0.1])
for t in range(T):
#eta /= (t+1)
#sample_ind = np.random.choice(num_samples, 1)
for sample_ind in range(num_samples):
w -= eta*subgd(X[sample_ind], y[sample_ind], w)
#if sample_ind == t:
# print(subgd(X[sample_ind], y[sample_ind], w))
w -= eta*lam*w
return w#_store/T
class MulticlassSVM(BaseEstimator, ClassifierMixin):
'''
Implements a Multiclass SVM estimator.
'''
def __init__(self, num_outFeatures, lam=0.01, num_classes=3, Delta=zeroOne, Psi=featureMap):
'''
Creates a MulticlassSVM estimator.
@param num_outFeatures: number of class-sensitive features produced by Psi
@param lam: l2 regularization parameter
@param num_classes: number of classes (assumed numbered 0,..,num_classes-1)
@param Delta: class-sensitive loss function taking two arguments (i.e., target margin)
@param Psi: class-sensitive feature map taking two arguments
'''
self.num_outFeatures = num_outFeatures
self.lam = lam
self.num_classes = num_classes
self.Delta = Delta
self.Psi = lambda X,y : Psi(X,y,num_classes)
self.fitted = False
def subgradient(self,x,y,w):
'''
Computes the subgradient at a given data point x,y
@param x: sample input
@param y: sample class
@param w: parameter vector
@return returns subgradient vector at given x,y,w
'''
scores_ = []
for i in [0,1,2]:
scores_.append( zeroOne(i,y) + np.dot(w, (self.Psi(x,i) - self.Psi(x,y) ).reshape(6) ) )
y_hat = (np.argmax( scores_ ) )
#print(x, x.shape, (self.Psi(x,y_hat) - self.Psi(x,y)).reshape(6))
return (self.lam*w + self.Psi(x, y_hat) - self.Psi(x, y)).reshape(6)
def fit(self,X,y,eta=0.1,T=10000):
'''
Fits multiclass SVM
@param X: array-like, shape = [num_samples,num_inFeatures], input data
@param y: array-like, shape = [num_samples,], input classes
@param eta: learning rate for SGD
@param T: maximum number of iterations
@return returns self
'''
self.coef_ = sgd(
X,y, self.num_outFeatures,self.subgradient,self.lam,eta,T) #when to use num_outFeatures?
self.fitted = True
return self
def decision_function(self, data):
'''
Returns the score on each input for each class. Assumes
that fit has been called.
@param X : array-like, shape = [n_samples, n_inFeatures]
@return array-like, shape = [n_samples, n_classes]
giving scores for each sample,class pairing
'''
if not self.fitted:
raise RuntimeError("You must train classifer before predicting data.")
samples = data.shape[0]
scores = np.zeros((samples, 3))
for s in range(samples):
for c in [0,1,2]:
scores[s,c] = np.dot(self.coef_, self.Psi(data[s], c).T)
return scores
def predict(self, X):
'''
Predict the class with the highest score.
@param X: array-like, shape = [n_samples, n_inFeatures], input data to predict
@return array-like, shape = [n_samples,], class labels predicted for each data point
'''
labels = np.zeros(X.shape[0])
table = self.decision_function(X)
#print(self.decision_function(X))
for i in range(X.shape[0]):
labels[i] = np.argmax( table[i,:] )
return labels
In the plot below we see this more complex model has also classified all data correctly.
In [18]:
#the following code tests the MulticlassSVM and sgd
#will fail if MulticlassSVM is not implemented yet
est = MulticlassSVM(6,lam=0.01)
est.fit(X,y)
print("w:")
print(est.coef_)
#print(mesh_input.shape)
Z = est.predict(mesh_input)
Z = Z.reshape(xx.shape)
plt.contourf(xx, yy, Z, cmap=plt.cm.coolwarm, alpha=0.8)
# Plot also the training points
plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.coolwarm)
from sklearn import metrics
metrics.confusion_matrix(y, est.predict(X))
Out[18]:
Now for multiclass classification of NYC urban sound files!
In [35]:
import librosa as l
path ='../UrbanSound8K/audio/fold1/'
y, sr = l.load(path + '7061-6-0-0.wav')#+'7061-6-0-0.wav')
In [47]:
import IPython
IPython.display.Audio(path + '7061-6-0-0.wav')
Out[47]:
In [45]:
from librosa.display import waveplot, specshow
plt.figure()
plt.subplot(3, 1, 1)
waveplot(y, sr=sr)
plt.title('Waves')
Out[45]:
In [44]:
D = l.amplitude_to_db(l.stft(y), ref=np.max)
plt.subplot(4, 2, 1)
specshow(D, y_axis='linear')
plt.colorbar(format='%+2.0f dB')
plt.title('Linear-frequency power spectrogram')
Out[44]:
In [122]:
def features(file_path):
try:
y, sr = l.load(file_path, duration=5.0, res_type='scipy')
except:
print(file_path)
# return 'r'
mfcc = l.feature.mfcc(y).mean(axis = 1).reshape(20)
#print(l.feature.delta(y).mean(), mfcc.shape)
features = np.append(mfcc,(l.feature.delta(y).mean()) ) #np.concatenate((l.feature.delta(y).mean(), mfcc ))
if features.shape[0] != 21:
#print(l.feature.delta(y).shape , l.feature.mfcc(y).shape)
raise Exception('sample w/ bad feature')
return features
#features(path + '7061-6-0-0.wav')
In [57]:
import pandas as pd
meta = pd.read_csv('../UrbanSound8K/metadata/UrbanSound8k.csv')
data_set = meta.sample(n=4000)
data_set.head()
Out[57]:
In [ ]:
array = np.zeros((4000, 21))
labels = np.zeros((4000,))
from os import listdir
i = 0
for folder in range(1,11):
path ='../UrbanSound8K/audio/fold%d/' %(folder)
for file in listdir(path):
if file in data_set['slice_file_name'].values:
labels[i] = data_set[data_set['slice_file_name'] == file]['classID'].values[0]
array[i] = features(path+file)
i += 1
mean = array.mean(axis=1)
array -= mean[:, np.newaxis]
array_normed = array/(array.std(axis=1))
In [60]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
data_set['slice_file_name'], data_set['classID'], test_size=0.5, random_state=42)
X_train.shape, X_test.shape, y_train.shape
Out[60]: